import pandas as pd

# 读取数据集
data = pd.read_csv('processed_order_train1.csv')

# 拆分first_cate_code列
data['year'] = pd.to_datetime(data['order_date']).dt.year
data['month'] = pd.to_datetime(data['order_date']).dt.month
data['day'] = pd.to_datetime(data['order_date']).dt.day
data['weekday'] = pd.to_datetime(data['order_date']).dt.weekday + 1

# 删除日期列
data.drop(['order_date'], axis=1, inplace=True)

# 合并sales_region_code和item_code列
data['region_item_code'] = data['sales_region_code'].astype(str) + data['item_code'].astype(str)
data.drop(['sales_region_code', 'item_code'], axis=1, inplace=True)

# 合并first_cate_code和second_cate_code列
data['cate_code'] = data['first_cate_code'].astype(str) + data['second_cate_code'].astype(str)
data.drop(['first_cate_code', 'second_cate_code'], axis=1, inplace=True)
data['order_quantity1'] = data['order_quantity']
data.drop(['order_quantity'], axis=1, inplace=True)
# 将sales_chan_name值改为0/1表示
data['sales_chan_name'] = data['sales_chan_name'].apply(lambda x: 1 if x == 'online' else 0)

# 显示处理后的数据集
print(data.head())

# 将处理后的数据保存到文件order_train1_processed.csv中
data.to_csv('order_train1_processed.csv', index=False)
